*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file prepares the grid-level data set will be used when processing the cluster data to generate PLs
* It insheets the results from teh clustering algorithm
* Merges information from the original grids
* This file processes cluster based on big data establishments with weights from the first half of the alphabet 

* Generate folder for final storage of this do file
	capture mkdir "$dataoutput/USMetroGridEmp"

* Sample for weights creation from the first half of the alphabet excludes the median 33460 
* Hence it is included in the below list of cities for which we predict employment
	local USMETROIDS 33460 34980  35620  36740  37980  38060 38300  38900  39300  40140  40900 41180  41700  41740  41860  41940 42660  45300  47260  47900 	
	
* Begin loop by employment type
	foreach EmpType of numlist 0 { 
		* Generate workspace for outputs
		capture mkdir "$dataoutput/USMetroGridEmp/EmpType_`EmpType'"
		local count = 1	// set counter to one
		* Begin loop by metro to process metro outputs
			foreach USmid of local USMETROIDS {
				di "...Working on employment type `EmpType' and CBSA `USmid', number `count'"
				* Import data 
				qui  import delimited "$data_USMETROS/CLUSTER OUTPUT/cells_validation_first/gridcells_`USmid'_`EmpType'_output.txt", clear 
			* Process variables
				qui drop v1
				qui drop v2
				qui drop v5 v3 v4
				qui foreach var of varlist v6 v7 v8 v9 {
					replace `var' = `var'*180/_pi
				}
				qui ren  v6 lon_UL
				qui ren  v8 lon_LR
				qui ren  v7 lat_UL
				qui ren  v9 lat_LR
				qui ren v11 cell_MFG_emp 	// emp_mfg_wholesale
				qui ren v12 cell_NT_emp 	// emp_nontradable_services
				qui ren v13 cell_PS_emp 	//emp_public_services
				qui ren v14 cell_TS_emp 	// emp_tradable_services
				qui ren v15 cell_O_emp 		// emp_others
				qui ren v16 cell_ST_emp 	// cell_emp_search_terms
				qui ren v17 cell_total_emp
				qui drop v18
				qui ren v19 clusterID
				qui drop v20 
				qui ren v21 cell_id
				qui drop v22 v23												// dropping developable here since it will be merged from the grid data later on 
			* Generate cbasfp ID
				qui split cell_id , p(_)	
				capture drop cell_id2 
				capture drop cell_id3
				qui destring cell_id1, replace force
				qui ren cell_id1 cbsafp
			* Merge legend features
				qui duplicates drop cell_id, force // need to deal with occasional duplicates
				qui merge 1:1 cell_id using "$temp/GridLegend/GRID_`USmid'_final.dta", keepusing(cbsafp square_id metro_id cbsafp area_g developable lon lat metro_name grid_x grid_y)
				qui drop if _m == 1 // There are cells outside the CBSA since employment is read from NETS in squares and we are mergin from the entire grid universe
     			qui drop _m
				qui foreach var of varlist clusterID cell_*_emp { // zero employment cells are missing in algorithm output
						replace `var' = 0 if `var' == .
					}
			* Save CBSA data
				qui compress // finde memomry efficient formats
				qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'", replace
				local count = `count'+1
				}
		* Loop by metro to process metro outputs ends
			
		* Append data
		clear
		foreach USmid of local USMETROIDS{
		display "...appending CBSA `USmid', employment type `EmpType' metro-grid data..."
		append using "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'"
			}	
		* Save data sets
		display "..finalizing `EmpType' metro-grid data..."
		qui duplicates drop 
		qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid__all.dta", replace
	}
	
* Script ends